tensorflow dataset은

dataset.as_numpy_iterator()를 리스트함으로서 데이터 셋의 내용을 확인할 수 있다.

import tensorflow as tf

import pathlib

import numpy as np

import tensorflow_datasets as tfds

from PIL import Image

TRAIN_NUM=16000

VALID_NUM=1000

tf.random.set_seed(1)

def count_items(ds):

n=0

for _ in ds:

n+=1

return n

imgdir_path=pathlib.Path('/Users/csian/tensorflow_datasets/downloads/manual/img_align_celeba/')

file_list=sorted([str(path) for path in imgdir_path.glob('*.jpg')])

fp=open('/Users/csian/tensorflow_datasets/downloads/manual/list_attr_celeba.txt', 'r')

line=fp.readline()

leng=int(line)

line=fp.readline()

index=line.split(' ')

loc=index.index('Male')

sex=[]

for i in range(leng):

line=fp.readline()

line=line[11:-1]

line=line.replace(' ', ' ')

line=line.split(' ')

if line[loc]=='-1':

li=0

else:

li=1

sex.append(li)

fp.close()

labels=tf.convert_to_tensor(sex)

labels=labels[:TRAIN_NUM+VALID_NUM]

tensor_list=[]

for i in range(TRAIN_NUM+VALID_NUM):

image=Image.open(file_list[i])

data=np.array(image)

td=tf.convert_to_tensor(data)

tensor_list.append(td)

print('%d / %d' %(i+1, TRAIN_NUM+VALID_NUM))

ds=tf.data.Dataset.from_tensor_slices((tensor_list, labels))

ds=ds.shuffle(TRAIN_NUM+VALID_NUM, reshuffle_each_iteration=False)

celeba_train=ds.take(16000)

celeba_valid=ds.skip(16000)

print('훈련 데이터셋: {}'.format(count_items(celeba_train)))

print('검증 데이터셋: {}'.format(count_items(celeba_valid)))

훈련 데이터셋: 16000

검증 데이터셋: 1000

데이터셋 전처리

import numpy as np

BATCH_SIZE=32

BUFFER_SIZE=1000

IMAGE_SIZE=(64, 64)

steps_per_epoch=np.ceil(TRAIN_NUM/BATCH_SIZE)

def preprocess(example, size=(64, 64), mode='train'):

image=example[0]

label=example[1]

if mode=='train':

image_cropped=tf.image.random_crop(image, size=(178, 178, 3))

image_resized=tf.image.resize(image_cropped, size=size)

image_flip=tf.image.random_flip_left_right(image_resized)

return image_flip/255.0, tf.cast(label, tf.int32)

else:

image_cropped=tf.image.crop_to_bounding_box(image, offset_height=20, offset_width=0, target_height=178, target_width=178)

image_resized=tf.image.resize(image_cropped, size=size)

return image_resized/255.0, tf.cast(label, tf.int32)

labels=[]

tensor_list=[]

for example in celeba_train:

img, lab=preprocess(example, size=IMAGE_SIZE, mode='train')

img=tf.convert_to_tensor(img)

lab=tf.convert_to_tensor(lab)

tensor_list.append(img)

labels.append(lab)

ds_train=tf.data.Dataset.from_tensor_slices((tensor_list, labels))

ds_train=ds_train.shuffle(buffer_size=BUFFER_SIZE).repeat()

ds_train=ds_train.batch(BATCH_SIZE)

labels=[]

tensor_list=[]

for example in celeba_valid:

img, lab=preprocess(example, size=IMAGE_SIZE, mode='eva')

img=tf.convert_to_tensor(img)

lab=tf.convert_to_tensor(lab)

tensor_list.append(img)

labels.append(lab)

ds_valid=tf.data.Dataset.from_tensor_slices((tensor_list, labels))

ds_valid=ds_valid.batch(BATCH_SIZE)

>>> celeba_train

>>> len(celeba_train)

16000

>>> celeba_valid

>>> len(celeba_valid)

1000

Test data set(batch size=32)

import tensorflow as tf

import pathlib

import numpy as np

import tensorflow_datasets as tfds

from PIL import Image

TEST_NUM=32

imgdir_path=pathlib.Path('/Users/csian/tensorflow_datasets/downloads/manual/img_align_celeba/')

file_list=sorted([str(path) for path in imgdir_path.glob('*.jpg')])

def preprocess(example, size=(64, 64), mode='train'):

image=example[0]

label=example[1]

if mode=='train':

image_cropped=tf.image.random_crop(image, size=(178, 178, 3))

image_resized=tf.image.resize(image_cropped, size=size)

image_flip=tf.image.random_flip_left_right(image_resized)

return image_flip/255.0, tf.cast(label, tf.int32)

else:

image_cropped=tf.image.crop_to_bounding_box(image, offset_height=20, offset_width=0, target_height=178, target_width=178)

image_resized=tf.image.resize(image_cropped, size=size)

return image_resized/255.0, tf.cast(label, tf.int32)

fp=open('/Users/csian/tensorflow_datasets/downloads/manual/list_attr_celeba.txt', 'r')

line=fp.readline()

leng=int(line)

line=fp.readline()

index=line.split(' ')

loc=index.index('Male')

sex=[]

for i in range(leng):

line=fp.readline()

line=line[11:-1]

line=line.replace(' ', ' ')

line=line.split(' ')

if line[loc]=='-1':

li=0

else:

li=1

sex.append(li)

fp.close()

labels=tf.convert_to_tensor(sex)

labels=labels[TRAIN_NUM+VALID_NUM:TRAIN_NUM+VALID_NUM+TEST_NUM]

tensor_list=[]

for i in range(TEST_NUM):

i=i+TRAIN_NUM+VALID_NUM

image=Image.open(file_list[i])

data=np.array(image)

td=tf.convert_to_tensor(data)

tensor_list.append(td)

print('%d / %d' %(i-TRAIN_NUM-VALID_NUM+1, TEST_NUM))

ds=tf.data.Dataset.from_tensor_slices((tensor_list, labels))

labels=[]

tensor_list=[]

for example in ds:

img, lab=preprocess(example, size=IMAGE_SIZE, mode='train')

img=tf.convert_to_tensor(img)

lab=tf.convert_to_tensor(lab)

tensor_list.append(img)

labels.append(lab)

ds_test=tf.data.Dataset.from_tensor_slices((tensor_list, labels))

ds_test=ds_test.batch(32)

Celeb_a dataset(self making)